*Data Appendix*
*Creating Table 5 (R&D analysis):
cd "Directory/Replication files"

*converting ASCII file into stata file:
insheet using compustat19552008.csv
save compustat19552008.dta, replace
clear
insheet using gdp_deflator.csv
save gdp_deflator.dta, replace
clear
insheet using CMSdata.csv
save CMSdata.dta, replace
clear
insheet using industry_output.csv
save industry_output.dta, replace
clear
insheet using ExternalFinance.csv
save ExternalFinance.dta, replace
clear

*construct the estimation sample:
use compustat19552008.dta, clear //Compustat Data
destring sic, replace
destring gvkey, replace
drop if sic==.
drop if gvkey==.
rename fyear year
*fixing mistake in data:
format datadate %d
replace year=2004 if year==2005 & gvkey==66399 & datadate==16587
drop if gvkey==6557 & datadate==8400
drop if gvkey==6557 & datadate==8765

sort year
merge year using gdp_deflator.dta //deflating compustat variables
tab _merge
drop if _merge==2
drop _merge
replace revt=0 if revt==.
replace revt=0 if revt<0
replace at=0 if at==.
replace lt=0 if lt==.
replace ppent=0 if ppent==.
replace dltt=0 if dltt==.
replace dd1=0 if dd1<0 
replace dd1=0 if dd1==.
replace dpc=0 if dpc==.
replace ibc=0 if ibc==.
replace ppent=0 if ppent==.
replace emp=0 if emp==.
replace xrd=0 if xrd==.
replace xrd=0 if xrd<0 
rename xrd xrd_orig
rename revt revt_orig
rename ppent ppent_orig
rename at at_orig
rename lt lt_orig
rename dd1 dd1_orig
rename dltt dltt_orig
rename dpc dpc_orig
rename ibc ibc_orig
gen xrd=(xrd_orig/gdp_deflator)*100
gen revt=(revt_orig/gdp_deflator)*100
gen ppent=(ppent_orig/gdp_deflator)*100
gen at=(at_orig/gdp_deflator)*100
gen lt=(lt_orig/gdp_deflator)*100
gen dd1=(dd1_orig/gdp_deflator)*100
gen dltt=(dltt_orig/gdp_deflator)*100
gen dpc=(dpc_orig/gdp_deflator)*100
gen ibc=(ibc_orig/gdp_deflator)*100

gen cash_after=dpc+ibc
gen cash_before=dpc+ibc+xrd
gen cash_after_scaled=cash_after+1000
sum cash_after_scaled, d
gen ln_rd=ln(xrd+1)
gen ln_sales=ln(revt+1)
gen ln_cashafter=ln(cash_after_scaled+1)
gen ln_cashbefore=ln(cash_before+1)
gen ln_emp=ln(emp+1)
gen ln_at=ln(at+1)
gen ln_lt=ln(lt+1)
gen ln_ppent=ln(ppent+1)
gen ln_dd1=ln(dd1+1)
gen ln_dltt=ln(dltt+1)

sort gvkey year
by gvkey: gen delta_lnrd=ln_rd[_n]-ln_rd[_n-1]
by gvkey: gen delta_lnlagrd=delta_lnrd[_n-1]
by gvkey: gen delta_lncashbefore=ln_cashbefore[_n]-ln_cashbefore[_n-1]
by gvkey: gen delta_lncashafter=ln_cashafter[_n]-ln_cashafter[_n-1]
by gvkey: gen delta_lncashafterlag=delta_lncashafter[_n-1]
by gvkey: gen delta_lnat=ln_at[_n]-ln_at[_n-1]
by gvkey: gen delta_lnatlag=delta_lnat[_n-1]
by gvkey: gen delta_lnlt=ln_lt[_n]-ln_lt[_n-1]
by gvkey: gen delta_lnltlag=delta_lnlt[_n-1]
by gvkey: gen delta_lnppent=ln_ppent[_n]-ln_ppent[_n-1]
by gvkey: gen delta_lnppentlag=delta_lnppent[_n-1]
by gvkey: gen delta_lndd1=ln_dd1[_n]-ln_dd1[_n-1]
by gvkey: gen delta_lndd1lag=delta_lndd1[_n-1]
by gvkey: gen delta_lndltt=ln_dltt[_n]-ln_dltt[_n-1]
by gvkey: gen delta_lndlttlag=delta_lndltt[_n-1]

gen sic3=int(sic/10)
sort sic3
merge sic3 using CMSdata.dta //merging in CMS survey data
tab _merge
drop if _merge==2
drop _merge

gen fast_cont=(q27a*q46a+q27b*q46b)/(q46a+q46b)
gen q32product=5 if q32b==1 & q32b!=.
replace q32product=30 if q32b>1 & q32b<=2 & q32b!=.
replace q32product=50 if q32b>2 & q32b<=3 & q32b!=.
replace q32product=75 if q32b>3 & q32b<=4 & q32b!=.
replace q32product=95 if q32b>4 & q32b!=.
sum q32product
gen q33process=5 if q33b==1 & q33b!=.
replace q33process=30 if q33b>1 & q33b<=2 & q33b!=.
replace q33process=50 if q33b>2 & q33b<=3 & q33b!=.
replace q33process=75 if q33b>3 & q33b<=4 & q33b!=.
replace q33process=95 if q33b>4 & q33b!=.
sum q33process

gen pat_effect=(q33process*q46a+q32product*q46b)/(q46a+q46b)
replace pat_effect=pat_effect/100
sum pat_effect, d

sort sic3 year
merge sic3 year using industry_output.dta //merging in manufacturing sector industry output
tab _merge
drop if _merge==2
drop _merge
sort gvkey year
gen ln_rgout=ln(rgout+1)
by gvkey: gen delta_lnrgout=ln_rgout[_n]-ln_rgout[_n-1]

sort sic3 
merge sic3 using ExternalFinance //merging in measure of external finance at sic3 level
tab _merge
drop if _merge==2
drop _merge

gen ln_rgoutXfastcont=ln_rgout*fast_cont
gen delta_lnrgoutXfastcont=delta_lnrgout*fast_cont
gen delta_lnrgoutXneg=delta_lnrgout*neg_rgout
gen delta_lnrgoutXpateff=delta_lnrgout*pat_effect
gen delta_lnrgoutXext=delta_lnrgout*external

keep if year>1974 & year<2003
tab year, gen (yy)
tsset gvkey year

save RD_analysis, replace //final estimation sample

*R&D analysis:
reg delta_lnrd delta_lnrgout delta_lncashafter delta_lncashafterlag delta_lnat delta_lnatlag delta_lnlt delta_lnltlag delta_lndltt delta_lndlttlag delta_lndd1 delta_lndd1lag delta_lnppent delta_lnppentlag if fast_cont!=. , cluster(gvkey)
reg delta_lnrd delta_lnrgout delta_lncashafter delta_lncashafterlag delta_lnat delta_lnatlag delta_lnlt delta_lnltlag delta_lndltt delta_lndlttlag delta_lndd1 delta_lndd1lag delta_lnppent delta_lnppentlag yy** if fast_cont!=. , cluster(gvkey)
reg delta_lnrd delta_lnrgout delta_lnrgoutXpateff pat_effect delta_lncashafter delta_lncashafterlag delta_lnat delta_lnatlag delta_lnlt delta_lnltlag delta_lndltt delta_lndlttlag delta_lndd1 delta_lndd1lag delta_lnppent delta_lnppentlag  yy** if fast_cont!=. , cluster(gvkey)
reg delta_lnrd delta_lnrgout delta_lnrgoutXfastcont  fast_cont  delta_lncashafter delta_lncashafterlag delta_lnat delta_lnatlag delta_lnlt delta_lnltlag delta_lndltt delta_lndlttlag delta_lndd1 delta_lndd1lag delta_lnppent delta_lnppentlag yy** if fast_cont!=. , cluster(gvkey)
reg delta_lnrd delta_lnrgout delta_lnrgoutXpateff pat_effect delta_lnrgoutXfastcont  fast_cont  delta_lncashafter delta_lncashafterlag delta_lnat delta_lnatlag delta_lnlt delta_lnltlag delta_lndltt delta_lndlttlag delta_lndd1 delta_lndd1lag delta_lnppent delta_lnppentlag  yy** if fast_cont!=. , cluster(gvkey)
reg delta_lnrd delta_lnrgout delta_lnrgoutXpateff pat_effect delta_lnrgoutXfastcont  fast_cont delta_lnrgoutXext external delta_lncashafter delta_lncashafterlag delta_lnat delta_lnatlag delta_lnlt delta_lnltlag delta_lndltt delta_lndlttlag delta_lndd1 delta_lndd1lag delta_lnppent delta_lnppentlag  yy** if fast_cont!=. , cluster(gvkey)
reg delta_lnrd delta_lnrgout delta_lnrgoutXpateff pat_effect delta_lnrgoutXfastcont  fast_cont delta_lnrgoutXext external delta_lncashafter delta_lncashafterlag delta_lnat delta_lnatlag delta_lnlt delta_lnltlag delta_lndltt delta_lndlttlag delta_lndd1 delta_lndd1lag delta_lnppent delta_lnppentlag  if fast_cont!=. , cluster(gvkey)
